libname home "/scratch/baruch/"; 

options compress=yes; 

%let timer = %sysfunc(datetime());
options nonotes;

%macro NASDAQ_after2015(year1, year2);

proc datasets nolist lib=work; delete _out; quit;

%do date = &year1 %to &year2;
%if %sysfunc(exist(taqmsec.nbbom_&date.)) %then %do;

proc datasets nolist lib=work; 
 delete _1 _2_1 _3_1 _2_2 _3_2 _3 _onday; 
run; quit;

data _1; set taqmsec.nbbom_&date.;
 keep DATE TIME_M symbol SYM_ROOT SYM_SUFFIX BEST_BIDEX BEST_BID BEST_ASKEX BEST_ASK;
 where time_m >= ('9:25:00.000000000't) and time_m <= ('16:05:00.000000000't) and 
       BEST_BIDEX in ("T" "Q") and BEST_ASKEX in ("T" "Q");
 symbol=catx(' ',sym_root, sym_suffix);
run;


********************************************************************************************;
********************************************************************************************;
***** time ended with 0 ********************************************************************;

%let t = %sysfunc(hms(9,30,0)); %let interval = 5*60; %let next = 10*60;
data _2_1; set _1; format time TIME20.9; 
 %do %until (&t. > %sysfunc(hms(16,00,0)));
  %let start = %sysevalf(&t. - &interval.); %let end = %sysevalf(&t. + &interval.);   
  if time_m >= &start. and time_m < &end. then do; time = &t.; diff_sec =  abs(time_m - time); end;
  %let t = %sysevalf(&t. + &next.);
 %end;
run;

proc sql;
 create table _3_1 as
 select distinct date, time, time_m, symbol, BEST_BID, BEST_BIDEX, BEST_ASK, BEST_ASKEX, (BEST_ASK-BEST_BID)/((BEST_ASK+BEST_BID)/2) as spread
 from _2_1
 group by date, symbol, time
 having diff_sec = min(diff_sec)
 order by date, symbol, time;
quit;

proc sort data = _3_1 nodupkey; by date symbol time; run;


********************************************************************************************;
********************************************************************************************;
***** time ended with 5 ********************************************************************;

%let t = %sysfunc(hms(9,35,0)); %let interval = 5*60; %let next = 10*60;
data _2_2; set _1; format time TIME20.9; 
 %do %until (&t. > %sysfunc(hms(15,55,0)));
  %let start = %sysevalf(&t. - &interval.); %let end = %sysevalf(&t. + &interval.);   
  if time_m >= &start. and time_m < &end. then do; time = &t.; diff_sec =  abs(time_m - time); end;
  %let t = %sysevalf(&t. + &next.);
 %end;
run;

proc sql;
 create table _3_2 as
 select distinct date, time, time_m, symbol, BEST_BID, BEST_BIDEX, BEST_ASK, BEST_ASKEX, (BEST_ASK-BEST_BID)/((BEST_ASK+BEST_BID)/2) as spread
 from _2_2
 group by date, symbol, time
 having diff_sec = min(diff_sec)
 order by date, symbol, time;
quit;

proc sort data = _3_2 nodupkey; by date symbol time; run;


********************************************************************************************;
********************************************************************************************;
***** merge together 0 with 5 **************************************************************;

data _3; set _3_1 _3_2; run;
proc sort data = _3; by date symbol time; run;

proc sql;
	create table _oneday as
	select a.*, b.cusip
	from _3 a left join taqmsec.mastm_&date. as b
	on a.date=b.date and a.symbol=b.symbol_15 
	order by date, symbol, time;
quit;

proc datasets nolist lib=work; append base=_out data=_oneday force; run; quit;
%end; %end;

%mend;

%NASDAQ_after2015(year1=20150101, year2=20211231);

/* save data to home dictionary */
data home.NASDAQ_2015to21; set _out; where time is not missing; run;

options notes;
data _null_;
dur = datetime() - &timer;
put 30*'-' / ' TOTAL DURATION:' dur time13.2 / 30*'-';
run; 
